/***********************************************************************************************************************************************************************
input datasets: 
public_history_plants : establishment-level data on subsidiaries, branches, and plants of U.S. firms from the National Establishment Time-Series (NETS) ;
ccmfunda8918: CRSP/Compustat Merged fundamentals annual data downloaded from WRDS ;
Enforcement_facility: facility-level information on EPA enforcement actions (from EPA);
Enforcements:information on EPA enforcement actions (from EPA);
Epanets_caseid: EPA enforcement actions case number merged with NETS data using dunsnumber;
ccmlinktable: CRSP and Compustat linking table downloaded from WRDS ;
tnic2 and tnic3: TNIC2 and TNIC3 peers downloaded from Hoberg and Phillips's website;
tri_harmful: plants' total harmful chemical releases (generated using data from EPA's Toxics Release Inventory (TRI) database);
RSEI_Score: plants' RSEI score downloaded from EPA;
Holdings_equityfund: mutual fund stock holding information from Thomson Reuters Mutual Fund Holdings database;
msf: stock monthly return and price information;
Escore_funds_msci_2groups: mutual fund portfolio firms' escore calculated following  Hwang, Titman, and Wang (2022) ;
Contact_info: fund contact information from CRSP mutual fund database;
inspections: EPA inspections of plants;
Pollution_control_treated: plant's pollution abatement activities from EPA;
output datasets: peertri.dta (for Table 2); peertrimf.dta (for Table 3); peertrimf_inspect.dta (for Table 4); peertrimf_process(for Table 7); plantmf_level(for Table 5)
****************************************************************************************************************************************************************************/

libname epadata "X:\data";
%include "X:\sas program\winsor.sas"; 
data plants;
set epadata.Public_history_plants;
Longitude=(-1)*Longitude;
 run;
 proc sort data=plants nodupkey; by gvkey DunsNumber year; run;

Proc sql;
create table enforce_plants as select a.*, b.ACTIVITY_STATUS_DATE, b.CASE_STATUS_DATE, b.TOTAL_PENALTY_ASSESSED_AMT, b.TOTAL_COST_RECOVERY_AMT, 
b.TOTAL_COMP_ACTION_AMT, b.REGION_CODE, b.ENF_OUTCOME_CODE, b.ENF_OUTCOME_DESC
from epadata.Enforcement_facility as a, epadata.Enforcements as b
where a.ACTIVITY_ID=b.ACTIVITY_ID and a.CASE_NUMBER=b.CASE_NUMBER; quit;

 proc sort data=enforce_plants nodupkey; by ACTIVITY_ID CASE_NUMBER FACILITY_UIN ACTIVITY_STATUS_DATE; run;

proc sql;
create table enforce_plants1 as select a.*, b.DunsNumber
from enforce_plants as a, epadata.Epanets_caseid as b
where a.ACTIVITY_ID=b.ACTIVITY_ID and a.CASE_NUMBER=b.CASE_NUMBER and a.FACILITY_UIN=b.REGISTRY_ID; quit;

data enforce_plants1;
set enforce_plants1;
year=year(ACTIVITY_STATUS_DATE);
run;

 proc sort data=enforce_plants1 out=enforce_plants2 nodupkey; by DunsNumber year ACTIVITY_STATUS_DATE; run;

 proc sql;
create table enforce_plants3 as select a.*, b.ACTIVITY_ID, b.TOTAL_PENALTY_ASSESSED_AMT, b.TOTAL_COST_RECOVERY_AMT, 
b.TOTAL_COMP_ACTION_AMT, b.REGION_CODE, b.ENF_OUTCOME_CODE, b.ENF_OUTCOME_DESC, b.ACTIVITY_STATUS_DATE, b.CASE_STATUS_DATE
from plants as a join enforce_plants2 as b
on a.DunsNumber=b.DunsNumber and a.year=b.year; quit;

 proc sort data=enforce_plants3 nodupkey; by gvkey DunsNumber year ACTIVITY_STATUS_DATE; run;


data enforce_plants3;
set enforce_plants3;
   format begdate enddate date9.;
begdate= mdy(1,1,year);
enddate= mdy(12,31,year);
   run;

proc sql;
            create table enforce_plants4
            as select a.*, b.lpermno
            from enforce_plants3 as a, epadata.ccmlinktable as b
            where a.gvkey = b.gvkey and
            b.LINKTYPE in ("LU","LC","LD","LF","LN","LO","LS","LX")
            and      (b.LINKDT<=a.ACTIVITY_STATUS_DATE) and (a.ACTIVITY_STATUS_DATE<=b.LINKENDDT or b.LINKENDDT=.E);
quit;

data enforce_plants4;
set enforce_plants4;
keep gvkey DunsNumber year ACTIVITY_ID TOTAL_PENALTY_ASSESSED_AMT TOTAL_COST_RECOVERY_AMT TOTAL_COMP_ACTION_AMT REGION_CODE
ENF_OUTCOME_CODE ENF_OUTCOME_DESC ACTIVITY_STATUS_DATE CASE_STATUS_DATE plant_zip plant_state FIPsCounty Latitude Longitude plant_NAICS plant_sic2 plant_sic2 
emp PayDexMin EstType LPERMNO ; 
run;

data enforce_plants4;
set enforce_plants4;
gvkey1=gvkey*1;
run;

/* match to peers*/

proc sql;
	create table tnic1a as
	select distinct a.*, b.gvkey2 as gvkey2_tnic3
	from epadata.tnic2 as a left join epadata.tnic3 as b
	on a.gvkey1 = b.gvkey1 and a.gvkey2=b.gvkey2 and a.year=b.year;
quit;

data tnic3;
set tnic1a;
if gvkey1^=gvkey2;
if gvkey2 = gvkey2_tnic3 then tnic3 = 1;
else tnic3 = 0;
drop score;
run;

proc sql;
create table peer1 as select distinct a.*, b.*
from tnic3 as a, enforce_plants4 as b
where a.gvkey1=b.gvkey1 and a.year=b.year;
quit;

data peer2;
set peer1;
keep gvkey2 gvkey1 gvkey2_tnic3 tnic3 gvkey DunsNumber year Latitude Longitude;
run;

data plants;
set plants;
gvkey1=gvkey*1;
run;
/* match to tnic2 peers' plants*/
proc sql;
create table peer3 as select distinct a.*, b.DunsNumber as DunsNumber2, b.Latitude as Latitude2, b.Longitude as Longitude2
from peer2 as a, plants  as b
where a.gvkey2=b.gvkey1 and a.year=b.year; quit;

/* calculate distance*/
data peer3;
set peer3;
distance=geodist(Latitude,Longitude,Latitude2,Longitude2, 'M');
run;

data nets_tri0;
 set epadata.Tri_harmful;
 gvkey2=gvkey*1;
 run;
proc sql;
create table nets_tri1 as select a.*, b.RSEI_Score, b.RSEI_Score_Cancer, b.RSEI_Score_Noncancer
from nets_tri0 as a left join epadata.RSEI_Score as b
on a.TRIFD=b.TRI_Facility_ID and a.year=b.Submission_Year; quit;

data tri_prod;
set epadata.Tri_chemical_plants_prod_ratio;
run;

proc sql;
create table tri_prod1 as select distinct a.gvkey, a.DunsNumber, a.year, mean(production_ratio*harmful) as prod_ratio_harm, mean(production_ratio) as prod_ratio
from tri_prod as a
group by gvkey, DunsNumber, year; quit;

proc sql;
create table nets_tri as select a.*, b.prod_ratio, b.prod_ratio_harm
from nets_tri1 as a left join tri_prod1 as b
on a.gvkey=b.gvkey and a.DunsNumber=b.DunsNumber and a.year=b.year; quit;

proc sql;
create table tri1 as select distinct a.*
from peer3 as a, nets_tri  as b
where a.gvkey2=b.gvkey2 and a.DunsNumber2=b.DunsNumber and a.year=b.year; quit;
/*each year, only keep one obs for a plant with the shortest distance*/

proc sort data=tri1 nodupkey; by gvkey2 DunsNumber2 year DunsNumber; run;

data tri1;
set tri1;
closetemp = 0;
if distance <= 100 then closetemp = 1;
run;

proc sql;
 create table fundcontact as select a.*, b.wficn, b.fund_name
 from epadata.Contact_info as a, epadata.Crsp_fundno_wficn as b
 where a.CRSP_FUNDNO=b.CRSP_FUNDNO; quit;

data fundcontact;
set fundcontact;
zip1=ZIP*1;
run;

proc sql;
create table fundcontact1 as select a.*, b.Y as Latitude1, b.X as Longitude1
from fundcontact as a, epadata.zip_city as b
where a.zip1=b.zip; quit;

proc sql;
create table fundcontact2 as select a.*, b.state_fips
from fundcontact1 as a, epadata.state_fips as b
where a.state=b.state; quit;


proc sql;
create table mfholding as select a.*, b.ZIP, b.Latitude1, b.Longitude1, b.fund_name, b.CRSP_FUNDNO, b.state_fips
from epadata.Holdings_equityfund  as a, fundcontact2 as b
where a.wficn=b.wficn and a.fdate>=b.CHGDT and a.fdate<=b.CHGENDDT; quit;

proc sort data=mfholding nodupkey; by wficn rdate PERMNO ZIP; run;

proc sql;
create table mfholding1 as select a.*, b.SR_funds
from mfholding as a, epadata.Escore_funds_msci_2groups as b
where a.wficn=b.wficn and year(a.rdate)=b.year; quit;

/*each fund-firm, keep the last obs for each year */
data mfholding1;
set mfholding1;
year=year(rdate);
run;

proc sort data=mfholding1 ; by wficn PERMNO year rdate; run;

proc sql;
            create table mfholding2
            as select a.*, b.gvkey
            from mfholding1 as a, epadata.ccmlinktable as b
            where a.permno = b.lpermno and
            b.LINKTYPE in ("LU","LC","LD","LF","LN","LO","LS","LX")
            and      (b.LINKDT<=a.rdate) and (a.rdate<=b.LINKENDDT or b.LINKENDDT=.E);
quit;

data mfholding2;
set mfholding2;
gvkey1=gvkey*1;
run;

data mfholding3;
set mfholding2;
if SR_funds=1;
run;

proc sort data=mfholding3 ; by wficn PERMNO year rdate; run;
data mfholding3b;
set mfholding3;
by wficn PERMNO year rdate ;
if last.year;
run;

proc sort data=mfholding3b; by wficn PERMNO year; run;
data mfholding3b;
set mfholding3b;
by wficn PERMNO year; 
lag_shares_adj=lag(shares_adj);
if first.PERMNO then lag_shares_adj=.;
run;

data shares1;
set epadata.msf;
if PRC^=.; 
PRC=abs(PRC);
if CFACPR^=0 then adjprc=PRC/CFACPR;
if CFACPR=0 or CFACPR=. then adjprc=PRC;
if CFACSHR^=0 then adjSHROUT=SHROUT*CFACSHR;
if CFACPR=0 or CFACPR=. then adjSHROUT=SHROUT;
year=year(date);
keep permno date year adjshrout shrout;
proc sort nodupkey; by permno year date;
run;

proc sql;
	create table mfholding4 as 
	select distinct a.*, b.adjshrout, a.shares_adj/(b.adjshrout*1000) as pctshares
	from mfholding3b as a, shares1 as b
	where a.permno = b.permno and a.year=year(b.date) and month(a.rdate) = month(b.date);
quit;

data mfholding4;
	set mfholding4;
	if pctshares < 0 then pctshares = 0;
	if pctshares > 1 then pctshares = 1;
	if pctshares = . then pctshares = 0;
run;

proc sql;
create table trimf1 as select a.*, b.wficn as wficn2, b.rdate as rdate2, b.Latitude1 as Latitude2mf, b.Longitude1 as Longitude2mf, b.shares_orginal, b.shares_adj, b.pctshares, b.lag_shares_adj
from tri1 as a left join mfholding4 as b
on a.gvkey2=b.gvkey1 and a.year=b.year; quit;


data trimf1;
set trimf1;
distance2mf=geodist(Latitude2mf,Longitude2mf,Latitude2,Longitude2, 'M');
distance1mf=geodist(Latitude2mf,Longitude2mf,Latitude,Longitude, 'M');
run;

data trimf1;
set trimf1;
if pctshares=. then pctshares=0;
closemf0 = 0;
if distance2mf <=100 and distance2mf^=.  and distance1mf<=100 and distance1mf^=. and pctshares>0.005 and  lag_shares_adj^=. then closemf0 = 1;
closemf1 = 0;
if distance1mf<=200 and distance1mf>100  and distance1mf^=. and distance2mf<=200 and distance2mf>100 and pctshares>0.005 and  lag_shares_adj^=. then closemf1 = 1; /*close to enforced firm only*/
closemf2 = 0;
if distance1mf<=200 and distance1mf>150  and distance1mf^=. and distance2mf<=200 and distance2mf>150 and pctshares>0.005 and  lag_shares_adj^=. then closemf2 = 1; /*close to enforced firm only*/
closemf3 = 0;
if  distance1mf>200  and distance1mf^=. and distance2mf>200  and distance2mf^=. and pctshares>0.005 and  lag_shares_adj^=. then closemf3 = 1; 
run;

proc sort data=trimf1 nodupkey; by  gvkey2 DunsNumber2  gvkey1 DunsNumber wficn2 year; run;

proc sql;
create table trimf3 as select distinct a.gvkey2, a.DunsNumber2, a.year, a.tnic3, a.closetemp, sum(closemf0) as closemfcount, 
sum(closemf1) as closemfcount1, sum(closemf2) as closemfcount2, sum(closemf3) as closemfcount3
from trimf1 as a
group by gvkey2, DunsNumber2,  year, tnic3, closetemp; quit;

data trimf3;
set trimf3;
closemf=0;
if closemfcount>0 and closemfcount^=. then closemf=1;
closemf1=0;
if closemfcount1>0 and closemfcount1^=. then closemf1=1;
closemf2=0;
if closemfcount2>0 and closemfcount2^=. then closemf2=1;
closemf3=0;
if closemfcount3>0 and closemfcount3^=. then closemf3=1;
run;
data trimf3;
set trimf3;
if closemf=1 then closemf1=0;
if closemf=1 then closemf2=0;
if closemf=1 then closemf3=0;
if closemf1=1 then closemf2=0;
if closemf2=1 then closemf3=0;
if closemf1=1 then closemf3=0;
run;

proc sort data=trimf3; by gvkey2 DunsNumber2 year tnic3 closetemp; run;

data tri2;
set trimf3;
by gvkey2 DunsNumber2 year tnic3 closetemp; 
if last.tnic3;
run;

proc sort data=tri2; by gvkey2 DunsNumber2 year descending tnic3; run;
proc sort data=tri2 out=tri3 nodupkey; by gvkey2 DunsNumber2 year; run;

data tri3;
set tri3;
rename tnic3=treatment;
close = 0;
if closetemp =1  then close = 1;
run;

/*get all firm-year from NETS*/
proc sort data=nets_tri out=firms nodupkey; by gvkey DunsNumber year; run;
proc sort data=tri3 out=firms1 nodupkey; by gvkey2 DunsNumber2; run;
proc sql;
create table firms2 as select a.*
from firms as a, firms1 as b
where a.gvkey2=b.gvkey2 and a.DunsNumber=b.DunsNumber2; quit;

proc sql;
create table tri4 as select a.gvkey, a.year, a.onsite_release, a.offsite_release, a.total_releases, a.RSEI_Score, a.RSEI_Score_Cancer, a.RSEI_Score_Noncancer, 
a.prod_ratio, a.prod_ratio_harm, a.DunsNumber, b.year as event_year, b.*
from firms2 as a left join tri3 as b
on a.gvkey2=b.gvkey2 and a.DunsNumber=b.DunsNumber2 and a.year>b.year and a.year<=b.year+3; quit;

data tri4; 
set tri4;
drop gvkey1;
run;

data tri4; 
set tri4;
post=0;
if event_year^=. then post=1;
if treatment=. then treatment=0;
if close=. then close=0;
if closemf=. then closemf=0;
if closemf1=. then closemf1=0;
if closemf2=. then closemf2=0;
if closemf3=. then closemf3=0;
run;

data tri5;
set tri4;
treat_close_post=treatment*close*post;
treat_close_post_closemf=treatment*close*post*closemf;
treat_close_post_closemf1=treatment*close*post*closemf1;
treat_close_post_closemf2=treatment*close*post*closemf2;
treat_close_post_closemf3=treatment*close*post*closemf3;
run;

proc sort data=tri5; by gvkey DunsNumber year descending treat_close_post; run;
proc sort data=tri5 nodupkey; by gvkey DunsNumber year ; run;

data event;
set tri5;
if event_year^=. and treatment=1 and close=1; run;

proc sort data=event nodupkey; by gvkey DunsNumber event_year ; run;
data event;
set event;
by gvkey DunsNumber event_year ;
firstevent=0;
if first.DunsNumber then firstevent=1;
run;
proc sql;
create table tri5a as select a.*, b.event_year as event_year610, b.firstevent
from tri5 as a left join event as b
on a.gvkey=b.gvkey and a.DunsNumber=b.DunsNumber; quit;

data tri5b;
set tri5a;
pre_treat=0;
if treat_close_post=0 and event_year610^=. and year-event_year610>3 then pre_treat=1;
run;
proc sort data=tri5b; by gvkey DunsNumber year treat_close_post descending pre_treat; run;
proc sort data=tri5b nodupkey; by gvkey DunsNumber year treat_close_post; run;
/*match firm controls*/
proc sql;
 create table ccmfunda as select a.*, b.invt
 from epadata.ccmfunda8918 as a, epadata.invt8918 as b
 where a.gvkey=b.gvkey and a.lpermno=b.lpermno and a.fyear=b.fyear and a.datadate=b.datadate; quit;
proc sort data=ccmfunda nodupkey; by gvkey fyear; run;
data ccmfunda1; set ccmfunda;
by gvkey fyear;
AT_1=lag(AT);
if first.gvkey then AT_1=.;
sale_1=lag(sale);
if first.gvkey then sale_1=.;
run;

data ccmfunda2;
set ccmfunda1;
gvkey1=gvkey*1;
BLEV=(DLTT+DLC)/AT;
MLEV=(DLTT+DLC)/(DLTT+DLC+CSHO*abs(PRCC_F));
LEV=(DLTT+DLC)/(AT-CEQ+abs(PRCC_F)*CSHO);
sale_growth=(sale-sale_1)/sale_1;
SIZE=log(abs(PRCC_F)*CSHO);
BM=CEQ/(abs(PRCC_F)*CSHO);
ROA=OIBDP/AT;
lnSale=log(sale);
MB=(abs(PRCC_F)*CSHO)/CEQ;
ROA=OIBDP/AT;
RDS=XRD/Sale;
ADS=XAD/Sale;
TobinQ=(abs(PRCC_F)*CSHO+DLTT+DLC)/AT;
ATN=PPENT/AT;
Cash=CHE/AT_1;
CF=(IB+DP)/AT_1;
Inventory=invt/sale_1;
run;

proc sql;
create table tri6 as select a.gvkey, a.year, a.close, a.treatment, a.treat_close_post, a.post, a.DunsNumber, a.onsite_release, a.offsite_release, a.total_releases, a.event_year, 
a.event_year610, a.firstevent, a.pre_treat, a.closemf1, a.closemf2, a.closemf3, a.RSEI_Score, a.RSEI_Score_Cancer, a.RSEI_Score_Noncancer, a.prod_ratio, a.prod_ratio_harm, 
a.treat_close_post_closemf, a.closemf, a.treat_close_post_closemf1, a.treat_close_post_closemf2, a.treat_close_post_closemf3,
b.gvkey1, b.lpermno, b.sic, b.sale_growth, b.size, b.bm, b.roa, b.mlev, b.blev, b.LEV, b.TobinQ, b.atn, b.lnsale, b.COGS, b.Inventory
from tri5b as a left join ccmfunda2 as b
on a.gvkey=b.gvkey and a.year=b.fyear; quit;

proc sort data=enforce_plants4 out=enforce_firm nodupkey; by gvkey DunsNumber year; run;

proc sql;
create table tri7 as select a.*, b.year as enforce_year
from tri6 as a left join enforce_firm as b
on a.gvkey=b.gvkey and a.DunsNumber=b.DunsNumber and a.year>b.year and a.year<=b.year+3; quit;

data tri7;
set tri7;
self_enforce=0;
if enforce_year^=. then self_enforce=1; run;
proc sort data=tri7; by gvkey DunsNumber year descending self_enforce; run;
proc sort data=tri7 nodupkey; by gvkey DunsNumber year; run;

data tri8;
set tri7;
sic3=int(sic/10);
sic2=int(sic/100);
if  SIC<6000 or SIC>6999;
if year<=2015;
ln_onsite=log(1+onsite_release);
ln_offsite=log(1+offsite_release);
ln_totaltri=log(1+total_releases);
run;

%winsor(dsetin=tri8, dsetout=peertri1, byvar=none, vars= RSEI_Score RSEI_Score_Cancer RSEI_Score_Noncancer ln_onsite  ln_offsite ln_totaltri size bm roa lev TobinQ  atn lnsale, pctl=1  99);	

proc sql;
create table peertri as select a.*, b.emp, b.plant_sic2, b.FIPsCounty, b.plant_NAICS
from peertri1 as a, plants as b
where a.gvkey=b.gvkey and a.dunsnumber=b.dunsnumber and a.year=b.year; quit;

data peertri;
set peertri;
onsite1=onsite_release;
offsite1=offsite_release;
totaltri1=total_releases;
run;

proc sql;
	create table p1a as
	select *, mean(onsite1) as avgonsite1, std(onsite1) as sdonsite1, 
	mean(offsite1) as avgoffsite1, std(offsite1) as sdoffsite1, 
	mean(totaltri1) as avgtotaltri1, std(totaltri1) as sdtotaltri1
	from peertri
	group by sic2, year;
quit;

data p1b;
	set p1a;
	sdonsite = (onsite1-avgonsite1)/sdonsite1;
	sdoffsite = (offsite1-avgoffsite1)/sdoffsite1;
	sdtotaltri = (totaltri1-avgtotaltri1)/sdtotaltri1;
run;

data p1d;
set p1b;
if self_enforce=1 then delete;
 run;

 
 data treat;
 set p1d;
 if treat_close_post=1;
 run;
proc sort data=treat out=cohort nodupkey; by event_year; run;
data cohort;
set cohort;
if event_year>=1993 and event_year<=2012;
keep event_year;
run;


 data cohort1;
 set cohort;
 by event_year; 
Cohort=_N_;
 run;

proc sort data=treat out=treat1 nodupkey; by gvkey dunsnumber event_year; run;

proc sql;
create table treat2 as select a.*, b.Cohort
from treat1 as a, cohort1 as b
where a.event_year=b.event_year; quit;

proc sql;
create table treat3 as select a.*, b.Cohort, b.event_year as event_year1
from p1d as a, treat2 as b
where a.gvkey=b.gvkey and a.dunsnumber=b.dunsnumber and a.year>=b.event_year-3 and a.year<=b.event_year+3; quit;

proc sort data=treat3 nodupkey; by gvkey dunsnumber year cohort; run;

data treat3;
set treat3;
if year<event_year1 then post1=0;
if year>event_year1 then post1=1;
pre1=0;
if event_year1-year=1 then pre1=1;
pre2=0;
if event_year1-year=2 then pre2=1;
pre3=0;
if event_year1-year=3 then pre3=1;
run;


data control;
set p1d;
if treat_close_post=0;
if pre_treat=0;
run;

proc sql;
create table control1 as select a.*, b.event_year as event_year1
from control as a left join treat2 as b
on a.gvkey=b.gvkey and a.dunsnumber=b.dunsnumber and a.year>=b.event_year-6 and a.year<=b.event_year+6; quit; 

data control2;
set control1;
if event_year1^=. then delete;
run;

data control2;
set control2;
drop event_year1;
run;

proc sql;
create table control3 as select a.*, b.cohort, b.event_year as event_year1
from control2 as a, cohort1 as b
where a.year>=b.event_year-3 and a.year<=b.event_year+3; quit;

proc sort data=control3 nodupkey; by gvkey dunsnumber year cohort; run;
data control3;
set control3;
if year<event_year1 then post1=0;
if year>event_year1 then post1=1;
treat1=0;
pre1=0;
if event_year1-year=1 then pre1=1;
pre2=0;
if event_year1-year=2 then pre2=1;
pre3=0;
if event_year1-year=3 then pre3=1;
run;
data treat3;
set treat3;
treat1=1;
run;

data p1e;
set treat3 control3;
if post1^=.;
treat_close_pre1=treat1*pre1;
treat_close_pre2=treat1*pre2;
treat_close_pre3=treat1*pre3;
run;
data p1e;
set p1e;
treat_post1=treat1*post1;
lnrsei_score=log(rsei_score+1);
run;

/*Table 1: summary statistics*/
proc means data=p1e n mean p5  p25 p40 median p60 p75 p95 std;
var sdonsite  lnrsei_score treat_post1 size bm roa   lnsale lev; 
run;

 PROC EXPORT DATA=p1e
            FILE="X:\Project_EPA\data\peertri.dta"
            DBMS=STATA REPLACE;
RUN;


 data treat;
 set p1d;
 if treat_close_post=1;
 run;
proc sort data=treat out=cohort nodupkey; by event_year; run;
data cohort;
set cohort;
if event_year>=2003 and event_year<=2012;
keep event_year;
run;


 data cohort1;
 set cohort;
 by event_year; 
Cohort=_N_;
 run;

proc sort data=treat out=treat1 nodupkey; by gvkey dunsnumber event_year; run;

proc sql;
create table treat2 as select a.*, b.Cohort
from treat1 as a, cohort1 as b
where a.event_year=b.event_year; quit;

proc sql;
create table treat3 as select a.*, b.Cohort, b.event_year as event_year1
from p1d as a, treat2 as b
where a.gvkey=b.gvkey and a.dunsnumber=b.dunsnumber and a.year>=b.event_year-3 and a.year<=b.event_year+3; quit;

proc sort data=treat3 nodupkey; by gvkey dunsnumber year cohort; run;

data treat3;
set treat3;
if year<event_year1 then post1=0;
if year>event_year1 then post1=1;
pre1=0;
if event_year1-year=1 then pre1=1;
pre2=0;
if event_year1-year=2 then pre2=1;
pre3=0;
if event_year1-year=3 then pre3=1;
run;


data control;
set p1d;
if treat_close_post=0;
if pre_treat=0;
run;

proc sql;
create table control1 as select a.*, b.event_year as event_year1
from control as a left join treat2 as b
on a.gvkey=b.gvkey and a.dunsnumber=b.dunsnumber and a.year>=b.event_year-6 and a.year<=b.event_year+6; quit; 

data control2;
set control1;
if event_year1^=. then delete;
run;

data control2;
set control2;
drop event_year1;
run;

proc sql;
create table control3 as select a.*, b.cohort, b.event_year as event_year1
from control2 as a, cohort1 as b
where a.year>=b.event_year-3 and a.year<=b.event_year+3; quit;

proc sort data=control3 nodupkey; by gvkey dunsnumber year cohort; run;
data control3;
set control3;
if year<event_year1 then post1=0;
if year>event_year1 then post1=1;
treat1=0;
pre1=0;
if event_year1-year=1 then pre1=1;
pre2=0;
if event_year1-year=2 then pre2=1;
pre3=0;
if event_year1-year=3 then pre3=1;
run;
data treat3;
set treat3;
treat1=1;
run;

data p1e;
set treat3 control3;
if post1^=.;
treat_close_pre1=treat1*pre1;
treat_close_pre2=treat1*pre2;
treat_close_pre3=treat1*pre3;
treat_pre1_closemf=treat1*pre1*closemf;
treat_pre2_closemf=treat1*pre2*closemf;
treat_pre3_closemf=treat1*pre3*closemf;
run;
data p1e;
set p1e;
treat_post1=treat1*post1;
lnrsei_score=log(rsei_score+1);
run;
 PROC EXPORT DATA=p1e
            FILE="X:\Project_EPA\data\peertrimf.dta"
            DBMS=STATA REPLACE;
RUN;

data Inspections1;
set epadata.inspections;
year=year(inspection_date);
run;

proc sql;
create table inspections as select distinct a.dunsnumber, a.year, count(distinct inspection_date) as number_of_inspections
from Inspections1 as a
group by dunsnumber, year; quit;

proc sort data=inspections; by dunsnumber year descending number_of_inspections; run;
proc sort data=inspections nodupkey; by dunsnumber year ; run;

proc sql;
create table p1f as select a.*, b.number_of_inspections as num_inspection
from p1e as a left join inspections as b
on a.dunsnumber=b.dunsnumber and a.year=b.year; quit;

data p1f;
set p1f;
inspect=0;
if num_inspection>0 and num_inspection^=. then inspect=1;
if num_inspection=. then num_inspection=0;
run;

 PROC EXPORT DATA=p1f
            FILE="X:\Project_EPA\data\peertrimf_inspect.dta"
            DBMS=STATA REPLACE;
RUN;


data Pollution_control;
set epadata.Pollution_control_treated;
if reduct_type="Practice_1" then reduce_type=11;
if reduct_type="Practice_2" then reduce_type=12;
if reduct_type="Practice_3" then reduce_type=13;
if reduct_type="Practice_4" then reduce_type=14;
if reduct_type="Process_1" then reduce_type=21;
if reduct_type="Process_2" then reduce_type=22;
if reduct_type="Process_3" then reduce_type=23;
if reduct_type="Process_4" then reduce_type=24;
if reduct_type="Process_5" then reduce_type=25;
if reduct_type="Process_6" then reduce_type=26;
if reduct_type="Process_7" then reduce_type=27;
if reduct_type="Process_8" then reduce_type=28;
if reduct_type="Process_9" then reduce_type=29;
run;
data Pollution_control;
set Pollution_control;
practice=0;
if reduce_type<=14 then practice=1;
Process=0;
if reduce_type>14 then Process=1;
run;
data Pollution_controla;
set Pollution_control;
if practice=1;
run;
proc sql;
create table Pollution_control1 as select distinct a.TRI_Facility_ID, a.year, a.practice, sum(treated_quantity) as practice_quant
from Pollution_controla as a
group by TRI_Facility_ID, year; quit;
data Pollution_controlb;
set Pollution_control;
if Process=1;
run;
proc sql;
create table Pollution_control2 as select distinct a.TRI_Facility_ID, a.year, a.Process, sum(treated_quantity) as process_quant
from Pollution_controlb as a
group by TRI_Facility_ID, year; quit;

proc sort data=Pollution_control out=Pollution_controlc nodupkey; by TRI_Facility_ID year; run;

proc sql;
create table Pollution_controld as select a.TRI_Facility_ID, a.year, b.practice, b.practice_quant
from Pollution_controlc as a left join Pollution_control1 as b
on a.TRI_Facility_ID=b.TRI_Facility_ID and a.year=b.year; quit;
proc sql;
create table Pollution_control3 as select a.*,  b.Process, b.process_quant
from Pollution_controld as a left join Pollution_control2 as b
on a.TRI_Facility_ID=b.TRI_Facility_ID and a.year=b.year; quit;

data Pollution_control3;
set Pollution_control3;
if practice=. then practice=0;
if practice_quant=. then practice_quant=0;
if Process=. then Process=0;
if process_quant=. then process_quant=0;
run;

data Pollution_control3;
 set Pollution_control3;
 format TRI_Facility_ID $50.;
 informat TRI_Facility_ID $50.;
 run;

 proc sql;
 create table Pollution_control4 as select a.DunsNumber, b.*
 from nets_tri as a left join Pollution_control3 as b
 on a.TRIFD=b.TRI_Facility_ID and a.year=b.year; quit;

data Pollution_control4;
set Pollution_control4;
if practice=. then practice=0;
if practice_quant=. then practice_quant=0;
if Process=. then Process=0;
if process_quant=. then process_quant=0;
run;

proc sql;
create table p1f as select distinct a.*, b.*
from p1e as a, Pollution_control4  as b
where a.DunsNumber=b.DunsNumber and a.year=b.year; quit;
 PROC EXPORT DATA=p1f
            FILE="X:\Project_EPA\data\peertrimf_process.dta"
            DBMS=STATA REPLACE;
RUN;

/************************************************************
Nyplant_mf_all1: NYC SRMF and plant pair-year information;
Nyplant_nsrmf_all1: NYC non-SRMF and plant pair-year information;
Nyctaximf_all1: NYC SRMF taxi visits to local plants;
Nyctaxi_nsrmf_all1: NYC non-SRMF taxi visits to local plants;
*************************************************************/
data nyc_zip;
set epadata.nyc_zip;
zip1=zip5*1;
run;

proc sql;
create table p1f as select a.*, b.zip1
from p1e as a left join nyc_zip as b
on a.plant_zip=b.zip1; quit;

data nyplant0;
set p1f;
if zip1^=. ;
if self_enforce=1 then delete;
 if year>=2009;
run;
data Nyplant_mf_all1;
set epadata.Nyplant_mf_all1;
srmf=1;
run;
proc sort data=Nyplant_mf_all1 nodupkey; by gvkey dunsnumber wficn2 year; run;
data Nyplant_nsrmf_all1;
set epadata.Nyplant_nsrmf_all1;
srmf=0;
run;
proc sort data=Nyplant_nsrmf_all1 nodupkey; by gvkey dunsnumber wficn2 year; run;
data plantmf_all;
set Nyplant_mf_all1 Nyplant_nsrmf_all1;
run;

proc sort data=plantmf_all; by gvkey dunsnumber wficn2 year descending srmf; run;
proc sort data=plantmf_all nodupkey; by gvkey dunsnumber wficn2 year; run;

data Nyctaximf_all1;
set epadata.Nyctaximf_all1;
trip=1;
if distance0<=0.01 and distance1<=0.01;
srmf=1;
run;

proc sql;
create table Nyctaximf_all2 as select distinct a.gvkey, a.gvkey1, a.dunsnumber, a.year, a.wficn2, a.srmf, sum(trip) as num_mftrip 
from Nyctaximf_all1 as a
group by gvkey1, dunsnumber, year, wficn2; quit;

data Nyctaxi_nsrmf1;
set epadata.Nyctaxi_nsrmf_all1;
trip=1;
if distance0<=0.01 and distance1<=0.01;
srmf=0;
run;

proc sql;
create table Nyctaxi_nsrmf2 as select distinct a.gvkey, a.gvkey1, a.dunsnumber, a.year, a.wficn2, a.srmf, sum(trip) as num_mftrip 
from Nyctaxi_nsrmf1 as a
group by gvkey1, dunsnumber, year, wficn2;  quit;

data taximf;
set Nyctaximf_all2 Nyctaxi_nsrmf2;
run;
proc sort data=taximf; by gvkey dunsnumber wficn2 year descending srmf; run;
proc sort data=taximf nodupkey; by gvkey dunsnumber wficn2 year; run;

proc sql;
create table plantmf_all1 as select a.*, b.num_mftrip
from plantmf_all as a left join taximf as b
on a.gvkey=b.gvkey and a.dunsnumber=b.dunsnumber and a.year=b.year and a.wficn2=b.wficn2; quit;

data plantmf_all1;
set plantmf_all1;
if num_mftrip=. then num_mftrip=0;
run;

data nyplant0a;
set nyplant0;
treat_post=treat1*post1;
run;
proc sort data=nyplant0a nodupkey; by gvkey dunsnumber year cohort; run;
proc sql;
create table plantmf_all1a as select distinct a.*, b.treat1, b.post1, b.cohort
from plantmf_all1 as a,   nyplant0a as b
where a.gvkey=b.gvkey and a.dunsnumber=b.dunsnumber and a.year=b.year; quit;
data plantmf_all1a;
set plantmf_all1a;
mfvisit=0;
if num_mftrip>0 and num_mftrip^=. then mfvisit=1;
if num_mftrip=. then num_mftrip=0;
run;
proc sort data=plantmf_all1a nodupkey; by gvkey dunsnumber year cohort wficn2; run;
data plantmf_all1a;
set plantmf_all1a;
distance2mf=geodist(Latitude2mf,Longitude2mf,Latitude,Longitude, 'M');
run;

data plantmf_all1b;
set plantmf_all1a;
if distance2mf>100 then delete;
run;

proc sql;
create table plantmf_all1c as select a.*, b.area
from plantmf_all1b as a left join nyc_zip as b
on a.plant_zip=b.zip1; quit;
 PROC EXPORT DATA=plantmf_all1c
            FILE="X:\Project_EPA\data\plantmf_level.dta"
            DBMS=STATA REPLACE;
RUN;
